import scrapy


class AppSpider(scrapy.Spider):
    name = "app"
    # allowed_domains = ["www.zhipin.com"]
    # start_urls = ['http://search.dangdang.com/?key=%B2%E9%C0%ED%BE%C5%CA%C0&act=input&page_index=1']


    def warn_on_generator_with_return_value_stub(spider, callable):
        pass

    scrapy.utils.misc.warn_on_generator_with_return_value = warn_on_generator_with_return_value_stub
    scrapy.core.scraper.warn_on_generator_with_return_value = warn_on_generator_with_return_value_stub

    # 携带cookie进行登录，然后调用parse函数
    def start_requests(self):
        url = 'http://search.dangdang.com/?key=%B2%E9%C0%ED%BE%C5%CA%C0&act=input&page_index=1'
        cookie = '__permanent_id=20230920231611757816761566318461699; dangdang.com=email=ZDA3NjU1NWFlOTE4NWRlN0BkZG1vYmlsZV91c2VyLmNvbQ==&nickname=&display_id=2358298867535&customerid=l8UnhPkhztLqXMfVrAcE7A==&viptype=c/5GHQl/FjE=&show_name=133****7052; ddscreen=2; __visit_id=20231012101515542981780980223343448; __out_refer=; dest_area=country_id%3D9000%26province_id%3D111%26city_id%3D0%26district_id%3D0%26town_id%3D0; pos_9_end=1697077157295; pos_0_start=1697077157394; pos_0_end=1697077157409; ad_ids=3539000%7C%232; USERNUM=KYktRh56D4dUdZlNxID1Kg==; login.dangdang.com=.ASPXAUTH=1kUydnmmYRdqIlrVed3mVv18ivPovzVc3NyaGVO/DOohvFfXaj9XVg==; MDD_username=133****7052; MDD_custId=q7m4VVqnwYI6F5NvHTgn/A%3D%3D; MDD_channelId=70000; MDD_fromPlatform=307; sessionID=pc_70e940e0219e7b710f2b4f5b11219f145a99a5bfbb4acee70d6dd3c281770f; ddoy=email=d076555ae9185de7@ddmobile_user.com&nickname=&validatedflag=0&uname=13337577052&utype=0&.ALFG=off&.ALTM=1697079067582; LOGIN_TIME=1697079069616; pos_6_start=1697079069798; pos_6_end=1697079070096; __rpm=%7Cp_11557235022...1697079073686; __trace_id=20231012105116720338014286415209942'
        cookie = {item.split("=")[0]: item.split("=")[1] for item in cookie.split('; ')}
        yield scrapy.Request(url=url, callback=self.parse, cookies=cookie)

    # 获取每一页的详情页url
    def parse(self, response):
        print('------------------------------')
        link = response.xpath('.//ul[@class="bigimg"]/li')
        for i in link:
            urls = i.xpath('.//a/@href').get()
            urls = 'http:' + urls
            yield scrapy.Request(url=urls, callback=self.parse_init)

    # 对详情页数据解析,对后续页面进行获取
    def parse_init(self, response):

        # title = response.xpath('/html/body/div[2]/div[3]/div[2]/div/div[1]/div[1]/h1/text()').get()
        # price = response.xpath("/html/body/div[2]/div[3]/div[2]/div/div[1]/div[6]/div[2]/div[1]/div[1]/p[2]/text()").get()
        # author = response.xpath('/html/body/div[2]/div[3]/div[2]/div/div[1]/div[2]/span[1]/a/text()').get()
        # press = response.xpath('/html/body/div[2]/div[3]/div[2]/div/div[1]/div[2]/span[2]/a/text()').get()
        # store_name = response.xpath('/html/body/div[2]/div[3]/div[2]/div/div[2]/div[2]/p[1]/span/span[2]/span/a/text()').get()

        title = response.xpath('//*[@id="product_info"]/div[1]/h1/text()')
        if title:
            title = title[0].extract().strip().strip('    ')
        print(title)


        with open('./cl.txt', 'a', encoding='utf-8') as f:
            f.write(title)
        num = 2
        while num <= 100:
            urls = f'http://search.dangdang.com/?key=%B2%E9%C0%ED%BE%C5%CA%C0&act=input&page_index={num}'
            # print(f'对{num}页进行解析')
            num = num + 1
            yield scrapy.Request(url=urls, callback=self.parse)

    # 定义下一页的url，然后调用parse函数
    # def parse_na(self, response):
    #     num = 2
    #     while num <= 100:
    #         url = f'http://search.dangdang.com/?key=%B2%E9%C0%ED%BE%C5%CA%C0&act=input&page_index={num}'
    #         print(f'对{num}页进行解析')
    #         num = num + 1
    #         yield scrapy.Request(url=url, callback=self.parse)



















